==================================

Samples Distribution Map

==================================

Generate a map of the distribution of M. assimilis tissue samples across Australia coded by collection date

# load libraries
library(oz)
library(readxl)

# load dataset
samples <- read_excel("Samples-Geo-Date.xlsx")
# set the latitudes and longitudes of points  
lat1 <- samples$`Latitude-1`        
lat2 <- samples$`Latitude-2`       
lat3 <- samples$`Latitude-3`        
lat4 <- samples$`Latitude-4`        
lat5 <- samples$`Latitude-5`        
lat6 <- samples$`Latitude-6`       
lat7 <- samples$`Latitude-7`
long1 <- samples$`Longitude-1`
long2 <- samples$`Longitude-2`
long3 <- samples$`Longitude-3`
long4 <- samples$`Longitude-4`
long5 <- samples$`Longitude-5`
long6 <- samples$`Longitude-6`
long7 <- samples$`Longitude-7`
# plot Australia 

oz()

# plot points 

points(long1,lat1, col = "#85C1E9", bg = "#85C1E9", pch = 24)   
points(long2,lat2, col = "#5DADE2", bg = "#5DADE2", pch = 24)
points(long3,lat3, col = "#3498DB", bg = "#3498DB", pch = 24)
points(long4,lat4, col = "#2E86C1", bg = "#2E86C1", pch = 24)
points(long5,lat5, col = "#2874A6", bg = "#2874A6", pch = 24)
points(long6,lat6, col = "#21618C", bg = "#21618C", pch = 24)
points(long7,lat7, col = "#1B4F72", bg = "#1B4F72", pch = 24)

==================================

SNP Parameters

==================================

Plotting parameters calculated for SNP filtering including: Coverage Depth, Individual Missingness, Average Site Depth and Site Missingness

Coverage Depth

# load libraries
library(ggplot2)
library(readxl)

# load dataset 
# this is an excel file containing the coverage of each individual sample as 
# estimated using SamTools 

coverage <- read_excel("coverage.xlsx")
# plot a histogram of the distribution of sample coverage using ggplot2

ggplot (coverage, aes(coverage$`Coverage Depth`)) + geom_histogram(breaks=seq(0,8, by =1), col="grey1", aes(fill=..count..)) + scale_fill_gradient("Count", high = "paleturquoise4", low = "paleturquoise1") + labs(x="Coverage Depth", y = "Frequency") + theme_bw()

Average Site Depth

# load libraries
library(ggplot2)
library(readxl)

# load dataset 
# this is an excel file containing the average depth of 100,000 random sites
# trimmed of sites with average depth >8 as these sites drag the histogram out
# such that the resolution is unreadable 
meansitedepth <- read_excel("mean-site-depth-trimmed-2.xlsx")
# set the variable sitedepth 
sitedepth <- meansitedepth$`Mean Site Depth`

# create a histogram 
hist(sitedepth, breaks=100, main = "Average Site Depth",    
     xlab = "Average Site Depth") 

# calculate the average depth
mean <- mean(sitedepth) 

# calculate the standard deviation
SD <- sd(sitedepth)  

# calculate 3 standard deviations
sdevs <- SD * 3  

# set the cutoff for average site depth as anything above this are usually 
# sequencing artefacts or paralogs 
cutoff <- sdevs + mean

# add lines representing the average site depth and cutoff site depth to the 
# histogram
abline(v = mean, col = "red", lwd = 2)                      
abline(v = cutoff, col = "blue", lwd = 2)

Individual Missingness

# load libraries
library(ggplot2)    
library(readxl)

# load dataset
# this is an excel file containing the individual missingness values for all
# samples across all 25 chromosomes 
missingness <- read_excel("missingness.xlsx")
# plot missingness
ggplot (missingness, aes(missingness)) + geom_histogram(breaks=seq(0,0.8, by =0.1), col="grey1", aes(fill=..count..)) + scale_fill_gradient("Count", high = "red4", low = "palevioletred2") + labs(x="Individual Missingness", y = "Frequency") + theme_bw()

Site Missingness

# load libraries 
library(ggplot2)    
library(readxl)

# load dataset
# this is an excel file containing the missingness of 100,000 sites across
# all 25 chromosomes
sitemissingness <- read_excel("site-missingness.xlsx")
# plot site missingness 

ggplot (sitemissingness, aes(sitemissingness$`site-missingness`)) + geom_histogram(breaks=seq(0,1, by =0.1), col="grey1", aes(fill=..count..)) + scale_fill_gradient("Count", high = "goldenrod2", low = "lightgoldenrod2") + labs(x="Site Missingness", y = "Frequency") + theme_bw()

==================================

Data Analysis

==================================

==================================

PCA

==================================

# load libraries

library(ggplot2)
library(readxl) 
library(RcppCNPy)     # for reading .npy (NUMPY) files
library(sf) 
## Linking to GEOS 3.6.1, GDAL 2.1.3, PROJ 4.9.3
# load datasets
# PCA.cov.npy is the covariance matrix without outlier individuals filtered, n=133
# PCA.filtered.cov.npy is the covariance matrix with outlier individuals filtered, # n = 119
# PCA.climate.cov.npy is the covariance matrix with individuals from PCA.filtered.cov.npy who are missing climate data removed
covmatrix <- npyLoad("PCA.cov.npy")  
covmatrixfiltered <- npyLoad("PCA.filtered.cov.npy")
climcovmatrix <- npyLoad("PCA.climate.cov.npy")

# calculate eigenvalues
Eigenvalues <- eigen(covmatrixfiltered)$values         
# calculate eigenvectors
Eigenvectors <- eigen(covmatrixfiltered)$vectors  
# calculate the principal components using a matrix multiplication 
PC <- as.matrix(covmatrixfiltered) %*% Eigenvectors
# transpose the data in order to get the loading of the variables for each principal component 
PCs <- t(PC)

# calculate the proportions of the variation explained by the various components
print(round(Eigenvalues/sum(Eigenvalues) * 100, digits = 2))
##   [1] 3.18 1.23 1.19 1.09 1.05 1.04 1.03 1.00 0.98 0.97 0.96 0.95 0.95 0.94
##  [15] 0.94 0.93 0.92 0.92 0.91 0.90 0.90 0.88 0.88 0.88 0.87 0.86 0.86 0.86
##  [29] 0.85 0.84 0.84 0.84 0.83 0.83 0.83 0.83 0.83 0.83 0.83 0.83 0.83 0.82
##  [43] 0.82 0.82 0.82 0.82 0.82 0.82 0.82 0.82 0.82 0.81 0.81 0.81 0.81 0.81
##  [57] 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.81 0.80
##  [71] 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80 0.80
##  [85] 0.80 0.80 0.80 0.80 0.79 0.79 0.79 0.79 0.79 0.79 0.79 0.79 0.79 0.79
##  [99] 0.79 0.79 0.78 0.78 0.78 0.78 0.78 0.77 0.76 0.75 0.68 0.68 0.65 0.64
## [113] 0.64 0.61 0.59 0.58 0.58 0.57 0.22
round(cumsum(Eigenvalues)/sum(Eigenvalues) * 100, digits = 2)
##   [1]   3.18   4.41   5.60   6.69   7.74   8.78   9.81  10.81  11.79  12.76
##  [11]  13.73  14.68  15.63  16.57  17.51  18.44  19.37  20.29  21.19  22.09
##  [21]  22.99  23.87  24.75  25.63  26.50  27.36  28.22  29.08  29.93  30.77
##  [31]  31.61  32.45  33.28  34.11  34.94  35.77  36.60  37.43  38.26  39.08
##  [41]  39.91  40.73  41.56  42.38  43.20  44.02  44.84  45.66  46.47  47.29
##  [51]  48.10  48.92  49.73  50.55  51.36  52.17  52.98  53.79  54.60  55.41
##  [61]  56.22  57.03  57.84  58.65  59.46  60.26  61.07  61.88  62.68  63.49
##  [71]  64.29  65.09  65.90  66.70  67.50  68.30  69.10  69.91  70.71  71.51
##  [81]  72.30  73.10  73.90  74.70  75.50  76.30  77.09  77.89  78.68  79.48
##  [91]  80.27  81.06  81.85  82.64  83.43  84.22  85.01  85.79  86.58  87.36
## [101]  88.15  88.93  89.71  90.49  91.27  92.04  92.80  93.56  94.24  94.92
## [111]  95.57  96.21  96.85  97.46  98.05  98.63  99.21  99.78 100.00
# take the principal component scores for individuals to create the PCA plot
PC1 <- PC[,1]
PC2 <- PC[,2]
PC3 <- PC[,3]

# transform into a dataframe and copied into an excel spreadsheet to form the
# principal component values in PCA-metadata.xlsx
PCA <- data.frame(PC1, PC2, PC3)
# create a biplot by hand
# load libraries
library(readxl)

# load dataset 
biplot.pca.metadata <- read_excel("Biplot Metadata.xlsx")
biplot.metadata <- read_excel("PCA-metadata.xlsx")

# set pch and colour groups (grouped by geography: NSW, NT, QLD, SA, VIC, WA, NA)
pch.group <- c(rep(21, times=10), rep(22, times=10), rep(23, times=10), rep(24, times=43), rep(3, times=1), rep(25, times=43), rep(4, times=2))

# red = NSW, gold = NT, green = QLD, blue = SA, black = VIC, pink = WA, black X = N.A
col.group <- c(rep("red", times=10), rep("gold", times=10), rep("green", times=10), rep("skyblue2", times=43), rep("black", times=1), rep("violet", times=43), rep("grey", times=2))

# plot the individuals in principal component space grouped by their shape and colour for state of collection 
plot(x=biplot.pca.metadata$PC1, y=biplot.pca.metadata$PC2, xlab="PC1 (3.18%)", ylab="PC2 (1.23%)", xlim=c(-1.0,1.0),ylim=c(-0.6,0.6), pch=pch.group, col="black", bg=col.group, cex=2, las=1, asp=1)

# add horizontal and vertical axes at zero 
abline(v=0, lty=2, col="grey50")
abline(h=0, lty=2, col="grey50")

# set the x and y co-ordinates of the variables, which are equivalent to
# the loading scores of the variables (i.e. the individual samples)
# these will form the arrows on the bi-lot and were created by transposing the 
# principal component data made above (check PCs)
l.x <- PCs[,1]
l.y <- PCs[,2]
arrows(x0=0, x1=l.x, y0=0, y1=l.y, col="red", length = 0.1, lwd = 1.5)

# set text labels 
l.pos <- l.y
lo <- which(l.y < 0)
hi <- which(l.y > 0)
l.pos <- replace(l.pos, lo, "1")
l.pos <- replace(l.pos, hi, "3")
text(l.x, l.y, labels=biplot.pca.metadata$`SAMPLE ID`, col="red", pos=l.pos)

# load the new data
PCA <- read_excel("PCA-metadata.xlsx")

# plot principal components with the collection date and geographic
# location (state) metadata 
ggplot(PCA, aes(x=PC1, y=PC2)) + geom_point(aes(shape = Period, color = STATE), position = "jitter", size = 2.5) + scale_shape_manual(values = c(3, 4, 8, 15, 17, 18, 19, 25)) + theme_bw() 

# plot principal components with the collection date and indvidual 
# missingness metadata
ggplot (PCA, aes(x=PC1, y=PC2)) + geom_point(aes(shape = Period, color = MISSINGNESS), position = "jitter", size = 2.5) + scale_shape_manual(values = c(3, 4, 8, 15, 17, 18, 19, 25)) + scale_color_continuous(high = "black", low = "turquoise2") + theme_bw() 

# plot samples by latitude and longitude on a scale of principal 
# componentvalues

australia <- st_read("australia.shp")
## Reading layer `australia' from data source `/Users/Elroy/Documents/University/Honours 2019/Everything/BIOINFOMATICS/australia.shp' using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 59 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 112.9194 ymin: -43.64129 xmax: 153.6306 ymax: -9.240167
## epsg (SRID):    NA
## proj4string:    NA
a <- ggplot() + geom_sf(data = australia, color = "black", fill = "white")
a + geom_point(data=PCA, aes(x=LONGITUDE, y=LATITUDE, color = PC1), size = 2.5) + scale_color_gradient2()
## Warning: Removed 5 rows containing missing values (geom_point).

==================================

Linear Regression

==================================

# load libraries

library(ggplot2)   
library(readxl)     
library(tidyverse)  # for correlation matrix
## ── Attaching packages ────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  2.1.3     ✔ purrr   0.3.2
## ✔ tidyr   1.0.0     ✔ dplyr   0.8.3
## ✔ readr   1.3.1     ✔ stringr 1.4.0
## ✔ tibble  2.1.3     ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
# load the new data
PCA <- read_excel("PCA-metadata.xlsx")

# linear regression model of PC1 values and individual missingness 
regression <- lm(PC1~MISSINGNESS, data = PCA)

summary(regression) 
## 
## Call:
## lm(formula = PC1 ~ MISSINGNESS, data = PCA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6637 -0.2794 -0.1337  0.3312  0.8308 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.08118    0.06700  -1.212    0.228
## MISSINGNESS  0.84386    0.52296   1.614    0.109
## 
## Residual standard error: 0.3809 on 117 degrees of freedom
## Multiple R-squared:  0.02177,    Adjusted R-squared:  0.01341 
## F-statistic: 2.604 on 1 and 117 DF,  p-value: 0.1093
# calculate the Pearson's correlation coefficient 
cPC1 <- PCA$PC1
cMiss <- PCA$MISSINGNESS
cor.test(cMiss, cPC1, method=c("pearson")) 
## 
##  Pearson's product-moment correlation
## 
## data:  cMiss and cPC1
## t = 1.6136, df = 117, p-value = 0.1093
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.03333433  0.31906832
## sample estimates:
##       cor 
## 0.1475465
# create a correlation matrix 
cor(PCA %>% select(PC1, PC2, Year, LATITUDE, LONGITUDE), use="pairwise.complete.obs")
##                    PC1          PC2       Year   LATITUDE  LONGITUDE
## PC1        1.000000000 -0.008453158 -0.3630122 -0.5775262  0.6651945
## PC2       -0.008453158  1.000000000  0.1581540  0.6096030  0.2317461
## Year      -0.363012228  0.158153981  1.0000000  0.4100755 -0.3038095
## LATITUDE  -0.577526165  0.609603032  0.4100755  1.0000000 -0.3364011
## LONGITUDE  0.665194486  0.231746097 -0.3038095 -0.3364011  1.0000000
# plot a linear regression 
ggplot(PCA, aes(y=PC1, x=MISSINGNESS)) + geom_point() + geom_smooth(method="lm") + theme_bw()

==================================

NGSadmix - ADMIXTURE

==================================

# load libraries
library(RColorBrewer)

# load the data 

pop <- read.table("Admix-Pop-Data.txt", fill = TRUE, header = FALSE)

q2 <- read.table("admix.2.txt", fill = TRUE, header = FALSE)

q3 <- read.table("admix.3.txt", fill = TRUE, header = FALSE)

q4 <- read.table("admix.4.txt", fill = TRUE, header = FALSE)

q5 <- read.table("admix.5.txt", fill = TRUE, header = FALSE)

q6 <- read.table("admix.6.txt", fill = TRUE, header = FALSE)


# order according to population
ord <- order (pop[,2])

# plot admixture for k
barplot(t(q2)[,ord],col=2:10, space=0, border=NA, xlab="Individuals", ylab="AdmixtureProportions(K=2)") 

# plot admixture for k using color brewer scheme 
barplot(t(q3)[,ord],col=brewer.pal(n=3, name="RdBu"),   
        space=0, border=NA, xlab="Individuals",         
        ylab="Admixture Proportions (K=3)")

# add individual population labels 
text(tapply(1:nrow(pop), pop[ord,2], mean),-0.05, unique(pop[ord,2]),xpd=T)  

# add lines between each individual and population
abline(v=cumsum(sapply(unique(pop[ord,1]), function(x){sum(pop[ord,1]==x)})), col="white",lwd=0.5)

abline(v=cumsum(sapply(unique(pop[ord,2]), function(x){sum(pop[ord,2]==x)})), col=1,lwd=2) 

==================================

ADMIXTURE SCATTER PLOTS

==================================

# load libraries

library(ggplot2)       
library(ggrepel)        
library(scatterpie)     
library(sf) 
library(readxl)

admixture <- read_excel("admix-metadata.xlsx")

# load shapefile of Australia

australia <- st_read("australia.shp")
## Reading layer `australia' from data source `/Users/Elroy/Documents/University/Honours 2019/Everything/BIOINFOMATICS/australia.shp' using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 59 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 112.9194 ymin: -43.64129 xmax: 153.6306 ymax: -9.240167
## epsg (SRID):    NA
## proj4string:    NA
# create a vector plot of Australia 
a <- ggplot() + geom_sf(data = australia, color = "black", fill = "white") 

# K = 2

Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K2.1
K2 <- admixture$K2.2

admixdata2 <- data.frame(Lat,Long,K1,K2)

a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata2, cols = c("K1", "K2")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()

# K = 3

Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K3.1
K2 <- admixture$K3.2
K3 <- admixture$K3.3

admixdata3 <- data.frame(Lat,Long,K1,K2,K3)

a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata3, cols = c("K1", "K2", "K3")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()

# K = 4

Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K4.1
K2 <- admixture$K4.2
K3 <- admixture$K4.3
K4 <- admixture$K4.4

admixdata4 <- data.frame(Lat,Long,K1,K2,K3,K4)

a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata4, cols = c("K1", "K2", "K3", "K4")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()

# K = 5

Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K5.1
K2 <- admixture$K5.2
K3 <- admixture$K5.3
K4 <- admixture$K5.4
K5 <- admixture$K5.5

admixdata5 <- data.frame(Lat,Long,K1,K2,K3,K4,K5)

a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata5, cols = c("K1", "K2", "K3", "K4", "K5")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()

# K = 6

Lat <- admixture$LATITUDE
Long <- admixture$LONGITUDE
K1 <- admixture$K1
K2 <- admixture$K2
K3 <- admixture$K3
K4 <- admixture$K4
K5 <- admixture$K5
K6 <- admixture$K6

admixdata6 <- data.frame(Lat,Long,K1,K2,K3,K4,K5,K6)

a + geom_scatterpie(aes(x = Long, y = Lat), data = admixdata6, cols = c("K1", "K2", "K3", "K4", "K5", "K6")) + scale_fill_manual(values=c("#F39C12", "#C39BD3", "#82E0AA", "#F7DC6F", "#EC7063", "#85C1E9")) + theme_light()

===============================================

CALCULATING CLIMATE VARIABLES - RATE OF CHANGE

===============================================

First estimate slopes for change in each climate variable over time; separate file for each climate variable. yearJD - year jan-dec for winter variables. yearJJ - year july-june for summer variables. slopes based on 20 years of data prior to capture year. Removed year of capture as the amount of months each sample is exposed to in the year is variable (e.g. could be <1 if caught in January).

#### CHANGE IN MEAN TEMPERATURE ####

Change.Tmean <- read.csv("Tmean.ForChange.csv", header = T)
summary(Change.Tmean)
##    SAMPLE.ID        YearJJ         Tmean      
##  ANWC100:  20   Min.   :1968   Min.   :11.12  
##  ANWC101:  20   1st Qu.:1988   1st Qu.:19.38  
##  ANWC102:  20   Median :1994   Median :22.14  
##  ANWC103:  20   Mean   :1994   Mean   :22.54  
##  ANWC104:  20   3rd Qu.:2000   3rd Qu.:26.73  
##  ANWC105:  20   Max.   :2014   Max.   :31.89  
##  (Other):2140
# these are missing from this data set:  "SAM9","SAM23", "SAM24", "SAM22", "SAM10", "SAM7"  
sample.list <- c("ANWC100","ANWC101","ANWC102","ANWC103","ANWC104","ANWC105","ANWC106","ANWC107","ANWC108","ANWC109","ANWC110","ANWC112","ANWC113","ANWC115","ANWC116", "ANWC117","ANWC118","ANWC119","ANWC126","ANWC128","ANWC129","ANWC130","ANWC131","ANWC132","ANWC133","ANWC134","ANWC135","ANWC136","ANWC137","ANWC138","ANWC139","ANWC140","ANWC141","ANWC142","ANWC143","ANWC144","ANWC145","ANWC146","ANWC147","ANWC148","ANWC150","ANWC151","ANWC152","ANWC153","ANWC154","ANWC155","ANWC156","ANWC157","ANWC158","ANWC159","ANWC160","ANWC161","ANWC162","ANWC163","ANWC168","ANWC170","ANWC66","ANWC67","ANWC68","ANWC69","ANWC71","ANWC72","ANWC73","ANWC75","ANWC77","ANWC78","ANWC79","ANWC80","ANWC81","ANWC84","ANWC85","ANWC88","ANWC90","ANWC92","ANWC94","SAM11","SAM12","SAM14","SAM15","SAM16","SAM17","SAM18","SAM19","SAM20","SAM21","SAM25","SAM26","SAM27","SAM28","SAM29", "SAM30","SAM31","SAM32","SAM33","SAM34","SAM35","SAM36","SAM37","SAM38","SAM39","SAM41", "SAM44","SAM45","SAM46","SAM8","WAM55","WAM56","WAM57","WAM58","WAM59","WAM60","WAM63","WAM64")

names(sample.list) <- c("sample") # couldn't find object "sample" before

Model.sample <- data.frame()  # creates datafame fresh each time model is run, so don't add to previous file
for (sample in sample.list)                  #creates loop defined by curley brackets
{                       
  sample.m1 <- lm(Tmean ~ scale(YearJJ, scale=FALSE),  
                 data = subset(Change.Tmean, SAMPLE.ID==sample))
  #summary(sample.m1)
  sample.s1 <-cbind(sample, intercept = sample.m1$coefficients[1], coefYear = sample.m1$coefficients[2], 
                   df.residual = sample.m1$df.residual, adj.r.square = summary(sample.m1)$adj.r.squared, 
                   df = summary(sample.m1)$df[2], fstat = summary(sample.m1)$fstatistic[1], pvalue = summary(sample.m1)$coefficients[2,4])  
  Model.sample <-rbind(Model.sample, sample.s1)  # saves summary to file
}

rownames(Model.sample) <- 1:length(rownames(Model.sample))  # this overwrites the first column, and numbers rows sequentially 
write.csv(Model.sample,"ChangeMeanTemp.csv") 
#### CHANGE IN NO. DAYS OVER 35 ####

Change.Ndays35<- read.csv("NDays35.ForChange.csv", header = T)
summary(Change.Ndays35)
##    SAMPLE.ID        YearJJ        NDays35      
##  ANWC100:  20   Min.   :1968   Min.   :  0.00  
##  ANWC101:  20   1st Qu.:1988   1st Qu.: 43.00  
##  ANWC102:  20   Median :1994   Median : 84.00  
##  ANWC103:  20   Mean   :1993   Mean   : 94.56  
##  ANWC104:  20   3rd Qu.:2000   3rd Qu.:138.00  
##  ANWC105:  20   Max.   :2014   Max.   :284.00  
##  (Other):2140
# these are missing from this data set:  "SAM9","SAM23", "SAM24", "SAM22", "SAM10", "SAM7"  
sample.list <- c("ANWC100","ANWC101","ANWC102","ANWC103","ANWC104","ANWC105","ANWC106","ANWC107","ANWC108","ANWC109","ANWC110","ANWC112","ANWC113","ANWC115","ANWC116", "ANWC117","ANWC118","ANWC119","ANWC126","ANWC128","ANWC129","ANWC130","ANWC131","ANWC132","ANWC133","ANWC134","ANWC135","ANWC136","ANWC137","ANWC138","ANWC139","ANWC140","ANWC141","ANWC142","ANWC143","ANWC144","ANWC145","ANWC146","ANWC147","ANWC148","ANWC150","ANWC151","ANWC152","ANWC153","ANWC154","ANWC155","ANWC156","ANWC157","ANWC158","ANWC159","ANWC160","ANWC161","ANWC162","ANWC163","ANWC168","ANWC170","ANWC66","ANWC67","ANWC68","ANWC69","ANWC71","ANWC72","ANWC73","ANWC75","ANWC77","ANWC78","ANWC79","ANWC80","ANWC81","ANWC84","ANWC85","ANWC88","ANWC90","ANWC92","ANWC94","SAM11","SAM12","SAM14","SAM15","SAM16","SAM17","SAM18","SAM19","SAM20","SAM21","SAM25","SAM26","SAM27","SAM28","SAM29", "SAM30","SAM31","SAM32","SAM33","SAM34","SAM35","SAM36","SAM37","SAM38","SAM39","SAM41", "SAM44","SAM45","SAM46","SAM8","WAM55","WAM56","WAM57","WAM58","WAM59","WAM60","WAM63","WAM64")

names(sample.list) <- c("sample") # couldn't find object "sample" before

Model.sample <- data.frame()  # creates datafame fresh each time model is run, so don't add to previous file
for (sample in sample.list)                  #creates loop defined by curley brackets
{                       
  sample.m1 <- lm(NDays35 ~ scale(YearJJ, scale=FALSE),  
                 data = subset(Change.Ndays35, SAMPLE.ID==sample))
  #summary(sample.m1)
  sample.s1 <-cbind(sample, intercept = sample.m1$coefficients[1], coefYear = sample.m1$coefficients[2], 
                   df.residual = sample.m1$df.residual, adj.r.square = summary(sample.m1)$adj.r.squared, 
                   df = summary(sample.m1)$df[2], fstat = summary(sample.m1)$fstatistic[1], pvalue = summary(sample.m1)$coefficients[2,4])  
  Model.sample <-rbind(Model.sample, sample.s1)  # saves summary to file
}

rownames(Model.sample) <- 1:length(rownames(Model.sample))  # this overwrites the first column, and numbers rows sequentially 
write.csv(Model.sample,"ChangeNdays35.csv")
#### CHANGE IN NO. DAYS UNDER 5 ####

Change.Ndays5 <- read.csv("NDays5.ForChange.csv", header = T)
summary(Change.Ndays5)
##    SAMPLE.ID        YearJD         Ndays5      
##  ANWC100:  20   Min.   :1974   Min.   :  0.00  
##  ANWC101:  20   1st Qu.:1989   1st Qu.:  0.00  
##  ANWC102:  20   Median :1994   Median : 27.00  
##  ANWC103:  20   Mean   :1994   Mean   : 27.51  
##  ANWC104:  20   3rd Qu.:2000   3rd Qu.: 50.00  
##  ANWC105:  20   Max.   :2014   Max.   :139.00  
##  (Other):2020
# these are missing from this data set:  "SAM9","SAM23", "SAM24", "SAM22", "SAM10", "SAM7", "WAM55", "ANWC84", "ANWC85", "ANWC88", "ANWC90","ANWC92"
sample.list <- c("ANWC100","ANWC101","ANWC102","ANWC103","ANWC104","ANWC105","ANWC106","ANWC107","ANWC108","ANWC109","ANWC110","ANWC112","ANWC113","ANWC115","ANWC116", "ANWC117","ANWC118","ANWC119","ANWC126","ANWC128","ANWC129","ANWC130","ANWC131","ANWC132","ANWC133","ANWC134","ANWC135","ANWC136","ANWC137", "ANWC138","ANWC139",   "ANWC140","ANWC141","ANWC142","ANWC143","ANWC144","ANWC145","ANWC146","ANWC147", "ANWC148","ANWC150","ANWC151","ANWC152","ANWC153","ANWC154","ANWC155","ANWC156","ANWC157", "ANWC158","ANWC159","ANWC160","ANWC161","ANWC162","ANWC163","ANWC168","ANWC170","ANWC66", "ANWC67", "ANWC68", "ANWC69", "ANWC71", "ANWC72", "ANWC73", "ANWC75", "ANWC77", "ANWC78","ANWC79", "ANWC80", "ANWC81", "ANWC94","SAM11", "SAM12", "SAM14", "SAM15", "SAM16", "SAM17", "SAM18", "SAM19", "SAM20", "SAM21","SAM25", "SAM26", "SAM27", "SAM28", "SAM29", "SAM30", "SAM31", "SAM32", "SAM33", "SAM34","SAM35", "SAM36", "SAM37", "SAM38", "SAM39", "SAM41", "SAM44", "SAM45", "SAM46", "SAM8", "WAM56", "WAM57", "WAM58", "WAM59", "WAM60", "WAM63", "WAM64")

names(sample.list) <- c("sample") # couldn't find object "sample" before

Model.sample <- data.frame()  # creates datafame fresh each time model is run, so don't add to previous file

for (sample in sample.list)                  #creates loop defined by curley brackets
{                       
  sample.m1 <- lm(Ndays5 ~ scale(YearJD, scale=FALSE),  
                 data = subset(Change.Ndays5, SAMPLE.ID==sample))
  #summary(sample.m1)
  sample.s1 <-cbind(sample, intercept = sample.m1$coefficients[1], coefYear = sample.m1$coefficients[2], 
                   df.residual = sample.m1$df.residual, adj.r.square = summary(sample.m1)$adj.r.squared, 
                   df = summary(sample.m1)$df[2], fstat = summary(sample.m1)$fstatistic[1], pvalue = summary(sample.m1)$coefficients[2,4])  
  Model.sample <-rbind(Model.sample, sample.s1)  # saves summary to file
}

rownames(Model.sample) <- 1:length(rownames(Model.sample))  # this overwrites the first column, and numbers rows sequentially
write.csv(Model.sample,"ChangeNdays5.csv")
#### CHANGE IN PRECIPITATION ####

Change.Rain <- read.csv("Rain.ForChange.csv", header = T)
summary(Change.Rain)
##    SAMPLE.ID        YearJD        RainSum         
##  ANWC119:  20   Min.   :1968   Min.   :0.0000355  
##  ANWC84 :  20   1st Qu.:1988   1st Qu.:0.0067233  
##  ANWC85 :  20   Median :1994   Median :0.0105657  
##  SAM41  :  20   Mean   :1994   Mean   :0.0142994  
##  ANWC100:  19   3rd Qu.:2000   3rd Qu.:0.0195285  
##  ANWC101:  19   Max.   :2014   Max.   :0.0679789  
##  (Other):2033
# these are missing from this data set:  "SAM9","SAM23", "SAM24", "SAM22", "SAM10", "SAM7"  
sample.list <- c("ANWC100","ANWC101","ANWC102","ANWC103","ANWC104","ANWC105","ANWC106","ANWC107","ANWC108","ANWC109","ANWC110","ANWC112","ANWC113","ANWC115","ANWC116", "ANWC117","ANWC118","ANWC119","ANWC126","ANWC128","ANWC129","ANWC130","ANWC131","ANWC132","ANWC133","ANWC134","ANWC135","ANWC136","ANWC137","ANWC138","ANWC139","ANWC140","ANWC141","ANWC142","ANWC143","ANWC144","ANWC145","ANWC146","ANWC147","ANWC148","ANWC150","ANWC151","ANWC152","ANWC153","ANWC154","ANWC155","ANWC156","ANWC157","ANWC158","ANWC159","ANWC160","ANWC161","ANWC162","ANWC163","ANWC168","ANWC170","ANWC66","ANWC67","ANWC68","ANWC69","ANWC71","ANWC72","ANWC73","ANWC75","ANWC77","ANWC78","ANWC79","ANWC80","ANWC81","ANWC84","ANWC85","ANWC88","ANWC90","ANWC92","ANWC94","SAM11","SAM12","SAM14","SAM15","SAM16","SAM17","SAM18","SAM19","SAM20","SAM21","SAM25","SAM26","SAM27","SAM28","SAM29", "SAM30","SAM31","SAM32","SAM33","SAM34","SAM35","SAM36","SAM37","SAM38","SAM39","SAM41", "SAM44","SAM45","SAM46","SAM8","WAM55","WAM56","WAM57","WAM58","WAM59","WAM60","WAM63","WAM64")

names(sample.list) <- c("sample") # couldn't find object "sample" before

Model.sample <- data.frame()  # creates datafame fresh each time model is run, so don't add to previous file
for (sample in sample.list)                  #creates loop defined by curley brackets
{                       
  sample.m1 <- lm(RainSum ~ scale(YearJD, scale=FALSE),  
                 data = subset(Change.Rain, SAMPLE.ID==sample))
  #summary(sample.m1)
  sample.s1 <-cbind(sample, intercept = sample.m1$coefficients[1], coefYear = sample.m1$coefficients[2], 
                   df.residual = sample.m1$df.residual, adj.r.square = summary(sample.m1)$adj.r.squared, 
                   df = summary(sample.m1)$df[2], fstat = summary(sample.m1)$fstatistic[1], pvalue = summary(sample.m1)$coefficients[2,4])  
  Model.sample <-rbind(Model.sample, sample.s1)  # saves summary to file
}

rownames(Model.sample) <- 1:length(rownames(Model.sample))  # this overwrites the first column, and numbers rows sequentially 
write.csv(Model.sample,"ChangeRain.csv")
# Calculate the correlation matrix to test for correlations between the variables
# Import the data 
climate.corr <- read.csv("ElroyCorDataSummary.csv", header = T)

# Create the correlation matrix and write it into a new csv file
write.csv(cor(climate.corr[,2:17], use="pairwise.complete.obs"), "ElroyClimateCorMatrix.csv")

==================================

DISTANCE MATRIX

==================================

# how the distance matrix was calculated 
# load libraries
library(geosphere) # to create the distance matrix
library(readxl)

#load data
metadata <- read_excel("climate-metadata.xlsx")

longitude <- metadata$LONGITUDE
latitude <- metadata$LATITUDE
coord <- data.frame(longitude,latitude)
coordmatrix <- as.matrix(coord)
# calculate distance in metres using shortest distance between 
# 2 points method in geosphere package
distance <- distm(coordmatrix, fun=distGeo)

==================================

MIXED MODELS: SPATIAL

==================================

library(readxl) 
library(ecodist)

# Load datasets

# CLIMATE DATA
# tropical + temperate individuals
climate.var <- read_excel("ClimVarMod.xlsx") 
# tropical + temperate individuals - no missing data
nmd.clim.var <- read_excel("ClimVarMod-NoMissingData.xlsx")
# temperate only individuals 
temp.climate.var <- read_excel("TemperateClimVarMod.xlsx")
# temperate only individuals - no missing data 
nmd.temp.climate.var <- read_excel("TemperateClimVarMod-NoMissingData.xlsx")

# GENETIC MATRIX
# tropical + temperate individuals 
genmatrix <- read.table("covariance-matrix.txt")
G <- as.matrix(genmatrix)
# tropical + temperate individuals - no missing data
nmd.genetic <- read.table("covariance-matrix-nmd.txt")
NG <- as.matrix(nmd.genetic)
# temperate only individuals
temp.genetic <- read.table("temperate-covariance-matrix.txt")
T.G <- as.matrix(temp.genetic)
# temperate only individuals - no missing data
nmd.temp.genetic <- read.table("covariance-matrix-nmd-temp.txt")
N.T.G <- as.matrix(nmd.temp.genetic)

# DISTANCE MATRIX
# tropical + temperate individuals
distance <- read.table("distance-matrix.txt")
D <- as.matrix(distance)
# tropical + temperate individuals - no missing data
nmd.distance <- read.table("distance-matrix-nmd.txt")
ND <- as.matrix(nmd.distance)
# temperate only individuals 
temp.distance <- read.table("temperate-distance-matrix.txt")
T.D <- as.matrix(temp.distance)
# temperate only individuals - no missing data
nmd.temp.distance <- read.table("distance-matrix-nmd-temp.txt")
N.T.D <- as.matrix(nmd.temp.distance)

# INBREEDING MODEL
# tropical + temperate
MRM(dist(INBREEDING) ~ as.dist(D) + as.dist(G) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=climate.var, nperm=500)
## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## Warning in dist(ScaledRainfive): NAs introduced by coercion
## $coef
##                         dist(INBREEDING)  pval
## Int                         9.304673e-02 0.068
## as.dist(D)                 -1.885499e-08 0.094
## as.dist(G)                 -1.502411e-01 0.106
## dist(ScaledNdays35five)    -3.133070e-03 0.662
## dist(ScaledNdays5five)      2.553644e-02 0.010
## dist(ScaledRainfive)       -3.210679e-03 0.640
## dist(Year)                 -1.649857e-03 0.104
## 
## $r.squared
##         R2       pval 
## 0.04690912 0.02600000 
## 
## $F.test
##        F   F.pval 
## 44.73085  0.02600
# tropical + temperate - no missing data 
MRM(dist(INBREEDING) ~ as.dist(ND) + as.dist(NG) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=nmd.clim.var, nperm=500)
## $coef
##                         dist(INBREEDING)  pval
## Int                         9.283213e-02 0.072
## as.dist(ND)                -1.866818e-08 0.092
## as.dist(NG)                -1.412163e-01 0.152
## dist(ScaledNdays35five)    -3.164813e-03 0.642
## dist(ScaledNdays5five)      2.553907e-02 0.022
## dist(ScaledRainfive)       -3.258472e-03 0.652
## dist(Year)                 -1.642770e-03 0.124
## 
## $r.squared
##         R2       pval 
## 0.04676811 0.04000000 
## 
## $F.test
##       F  F.pval 
## 44.5898  0.0400
# temperate only
MRM(dist(INBREEDING) ~ as.dist(T.D) + as.dist(T.G) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=temp.climate.var, nperm=500)
## Warning in dist(ScaledNdays5five): NAs introduced by coercion

## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## $coef
##                         dist(INBREEDING)  pval
## Int                         1.103537e-01 0.428
## as.dist(T.D)                2.833225e-09 0.760
## as.dist(T.G)               -7.405639e-02 0.182
## dist(ScaledNdays35five)    -1.592697e-02 0.120
## dist(ScaledNdays5five)      2.089003e-02 0.056
## dist(ScaledRainfive)       -1.622833e-02 0.166
## dist(Year)                 -1.210449e-03 0.226
## 
## $r.squared
##         R2       pval 
## 0.03172831 0.18800000 
## 
## $F.test
##        F   F.pval 
## 13.15088  0.18800
# GENETICS MODEL
# tropical + temperate
MRM(as.dist(G) ~ as.dist(D) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=climate.var, nperm=500)
## Warning in dist(ScaledNdays5five): NAs introduced by coercion

## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## $coef
##                            as.dist(G)  pval
## Int                      2.609565e-02 0.002
## as.dist(D)              -2.114681e-08 0.002
## dist(ScaledNdays35five) -3.199908e-03 0.002
## dist(ScaledNdays5five)  -4.261939e-04 0.174
## dist(ScaledRainfive)     1.937645e-03 0.004
## dist(Year)              -4.798280e-04 0.002
## 
## $r.squared
##        R2      pval 
## 0.3018008 0.0020000 
## 
## $F.test
##        F   F.pval 
## 471.5049   0.0020
# tropical + temperate - no missing data 
MRM(as.dist(NG) ~ as.dist(ND) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=nmd.clim.var, nperm=500)
## $coef
##                           as.dist(NG)  pval
## Int                      2.624368e-02 0.002
## as.dist(ND)             -2.117537e-08 0.002
## dist(ScaledNdays35five) -3.629192e-03 0.002
## dist(ScaledNdays5five)  -4.347884e-04 0.210
## dist(ScaledRainfive)     1.723037e-03 0.002
## dist(Year)              -4.603036e-04 0.002
## 
## $r.squared
##        R2      pval 
## 0.3017216 0.0020000 
## 
## $F.test
##        F   F.pval 
## 471.3277   0.0020
# temperate only
MRM(as.dist(T.G) ~ as.dist(T.D) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=temp.climate.var, nperm=500)
## Warning in dist(ScaledNdays5five): NAs introduced by coercion

## Warning in dist(ScaledNdays5five): NAs introduced by coercion
## $coef
##                          as.dist(T.G)  pval
## Int                      9.412562e-03 0.782
## as.dist(T.D)            -2.359639e-09 0.014
## dist(ScaledNdays35five) -1.758287e-02 0.002
## dist(ScaledNdays5five)  -5.800697e-04 0.338
## dist(ScaledRainfive)    -1.962407e-03 0.020
## dist(Year)              -8.118931e-04 0.004
## 
## $r.squared
##         R2       pval 
## 0.07501759 0.00200000 
## 
## $F.test
##        F   F.pval 
## 39.07477  0.00200
# temperate only - no missing data 
MRM(as.dist(N.T.G) ~ as.dist(N.T.D) + dist(ScaledNdays35five) + dist(ScaledNdays5five) + dist(ScaledRainfive) + dist(Year), data=nmd.temp.climate.var, nperm=500)
## $coef
##                         as.dist(N.T.G)  pval
## Int                       1.534757e-02 0.002
## as.dist(N.T.D)           -2.030517e-08 0.002
## dist(ScaledNdays35five)  -8.872619e-03 0.004
## dist(ScaledNdays5five)    4.213988e-03 0.002
## dist(ScaledRainfive)      3.628750e-03 0.002
## dist(Year)               -5.967811e-04 0.014
## 
## $r.squared
##        R2      pval 
## 0.1934525 0.0020000 
## 
## $F.test
##       F  F.pval 
## 115.561   0.002

==================================

MIXED MODELS: TEMPORAL

==================================

# TEMPORAL INBREEDING MODEL
# load libraries 
library(sommer)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: lattice
## Loading required package: crayon
## 
## Attaching package: 'crayon'
## The following object is masked from 'package:ggplot2':
## 
##     %+%
library(readxl)

# load datasets 
# these datasets have no missing data for any metadata or variables 
# CLIMATE DATA
# tropical + temperate
nmd.clim.var <- read_excel("ClimVarMod-NoMissingData.xlsx")
#temperate only
nmd.temp.clim.var <- read_excel("TemperateClimVarMod-NoMissingData.xlsx")

# GENETIC MATRIX 
# tropical + temperate 
nmd.matrix <- read.table("covariance-matrix-nmd.txt")
G <- as.matrix(nmd.matrix)
# temperate only
nmd.temp.matrix <- read.table("covariance-matrix-nmd-temp.txt")
T.G <- as.matrix(nmd.temp.matrix)

# MIXED MODEL IN SOMMER
# tropical + temperate - PRIMARY
inbreed.model <- mmer(fixed = INBREEDING ~ ScaledChangeMeanTemp * ScaledNdays35five + ScaledChangeMeanTemp * ScaledNdays5five + ScaledRainfive * ScaledChangeRain, random = ~ vs(ID, Gu=G) + Year + IBRA, rcov= ~ units, data=nmd.clim.var)
## iteration    LogLik     wall    cpu(sec)   restrained
##     1      -45.6366   22:19:43      0           2
##     2      -42.621   22:19:43      0           2
##     3      -42.0242   22:19:43      0           2
##     4      -40.9728   22:19:43      0           2
##     5      -40.3969   22:19:43      0           2
##     6      -40.3299   22:19:43      0           2
##     7      -40.3285   22:19:43      0           2
##     8      -40.3285   22:19:43      0           2
# tropical + temperate - COLD
inbreed.model2 <- mmer(fixed = INBREEDING ~ ScaledChangeMeanTemp * ScaledNdays5five + ScaledRainfive * ScaledChangeRain, random = ~ vs(ID, Gu=G) + Year + IBRA, rcov= ~ units, data=nmd.clim.var)
## iteration    LogLik     wall    cpu(sec)   restrained
##     1      -46.6681   22:19:43      0           2
##     2      -43.2365   22:19:43      0           2
##     3      -42.6376   22:19:43      0           2
##     4      -41.562   22:19:43      0           2
##     5      -40.902   22:19:43      0           2
##     6      -40.8039   22:19:43      0           2
##     7      -40.8016   22:19:43      0           2
##     8      -40.8016   22:19:43      0           2
# tropical + temperate - HOT
inbreed.model3 <- mmer(fixed = INBREEDING ~ ScaledChangeMeanTemp * ScaledNdays35five + ScaledRainfive * ScaledChangeRain, random = ~ vs(ID, Gu=G) + Year + IBRA, rcov= ~ units, data=nmd.clim.var)
## iteration    LogLik     wall    cpu(sec)   restrained
##     1      -50.3615   22:19:43      0           2
##     2      -46.3847   22:19:43      0           2
##     3      -45.7831   22:19:43      0           2
##     4      -44.6763   22:19:43      0           2
##     5      -43.8759   22:19:43      0           3
##     6      -43.7136   22:19:43      0           3
##     7      -43.7136   22:19:43      0           3
# TEMPORAL GENETICS MODEL

library(vegan)
## Loading required package: permute
## This is vegan 2.5-5
## 
## Attaching package: 'vegan'
## The following object is masked from 'package:ecodist':
## 
##     mantel
library(readxl)

# load datasets 
# these datasets have no missing data for any metadata or variables 
# CLIMATE DATA
# tropical + temperate
nmd.clim.var <- read_excel("ClimVarMod-NoMissingData.xlsx")
#temperate only
nmd.temp.clim.var <- read_excel("TemperateClimVarMod-NoMissingData.xlsx")

# GENETIC MATRIX 
# tropical + temperate 
nmd.matrix <- read.table("covariance-matrix-nmd.txt")
G <- as.matrix(nmd.matrix)
AG <- as.matrix(G)
# convert any negative values to a zero as adonis2 doesn't like negatives 
AG[AG < 0] <- 0
# temperate only
nmd.temp.matrix <- read.table("covariance-matrix-nmd-temp.txt")
T.G <- as.matrix(nmd.temp.matrix)

# MIXED MODEL USING ADONIS 
# tropical + temperate 
temporal.model1 <- adonis2(AG ~ ScaledChangeMeanTemp*ScaledNdays35five + ScaledChangeMeanTemp*ScaledNdays5five + ScaledRainfive*ScaledChangeRain + Year + IBRA, data = nmd.clim.var)

temporal.model2 <- adonis2(AG ~ ScaledChangeMeanTemp*ScaledNdays35five + ScaledRainfive*ScaledChangeRain + Year + IBRA, data = nmd.clim.var)

temporal.model3 <- adonis2(AG ~ ScaledChangeMeanTemp*ScaledNdays5five + ScaledRainfive*ScaledChangeRain + Year + IBRA, data = nmd.clim.var)

# Calculate AICs for adonis models 
# Script source: https://github.com/kdyson/R_Scripts/blob/master/AICc_PERMANOVA.R

AICc.PERMANOVA2 <- function(adonis2.model) {
     
     # check to see if object is an adonis2 model...
     
     if (is.na(adonis2.model$SumOfSqs[1]))
         stop("object not output of adonis2 {vegan} ")
     
     # Ok, now extract appropriate terms from the adonis model Calculating AICc
     # using residual sum of squares (RSS or SSE) since I don't think that adonis
     # returns something I can use as a liklihood function... o wait maximum likelihood may be MSE.
     
     RSS <- adonis2.model$SumOfSqs[ length(adonis2.model$SumOfSqs) - 1 ]
     MSE <- RSS / adonis2.model$Df[ length(adonis2.model$Df) - 1 ]
     
     nn <- adonis2.model$Df[ length(adonis2.model$Df) ] + 1
     
     k <- nn - adonis2.model$Df[ length(adonis2.model$Df) - 1 ]
     
     
     # AIC : 2*k + n*ln(RSS)
     # AICc: AIC + [2k(k+1)]/(n-k-1)
     
     # based on https://en.wikipedia.org/wiki/Akaike_information_criterion;
     # https://www.researchgate.net/post/What_is_the_AIC_formula;
     # http://avesbiodiv.mncn.csic.es/estadistica/ejemploaic.pdf
     
     # AIC.g is generalized version of AIC = 2k + n [Ln( 2(pi) RSS/n ) + 1]
     # AIC.pi = k + n [Ln( 2(pi) RSS/(n-k) ) +1],
     
     AIC <- 2*k + nn*log(RSS)
     AIC.g <- 2*k + nn * (1 + log( 2 * pi * RSS / nn))
     AIC.MSE <- 2*k + nn * log(MSE)
     AIC.pi <- k + nn*(1 + log( 2*pi*RSS/(nn-k) )   )
     AICc <- AIC + (2*k*(k + 1))/(nn - k - 1)
     AICc.MSE <- AIC.MSE + (2*k*(k + 1))/(nn - k - 1)
     AICc.pi <- AIC.pi + (2*k*(k + 1))/(nn - k - 1)
     
     output <- list("AIC" = AIC, "AIC.g" = AIC.g, "AICc" = AICc,
                    "AIC.MSE" = AIC.MSE, "AICc.MSE" = AICc.MSE,
                    "AIC.pi" = AIC.pi, "AICc.pi" = AICc.pi, "k" = k)
     
     return(output)   
     
}

AICc.PERMANOVA2(adonis2.model = temporal.model1)
## $AIC
## [1] 380.5788
## 
## $AIC.g
## [1] 189.89
## 
## $AICc
## [1] 431.8288
## 
## $AIC.MSE
## [1] -57.73189
## 
## $AICc.MSE
## [1] -6.481886
## 
## $AIC.pi
## [1] 200.2452
## 
## $AICc.pi
## [1] 251.4952
## 
## $k
## [1] 40
AICc.PERMANOVA2(adonis2.model = temporal.model2)
## $AIC
## [1] 382.0189
## 
## $AIC.g
## [1] 191.3301
## 
## $AICc
## [1] 426.9279
## 
## $AIC.MSE
## [1] -59.47387
## 
## $AICc.MSE
## [1] -14.56478
## 
## $AIC.pi
## [1] 200.5032
## 
## $AICc.pi
## [1] 245.4123
## 
## $k
## [1] 38
AICc.PERMANOVA2(adonis2.model = temporal.model3)
## $AIC
## [1] 382.1366
## 
## $AIC.g
## [1] 191.4478
## 
## $AICc
## [1] 427.0457
## 
## $AIC.MSE
## [1] -59.35614
## 
## $AICc.MSE
## [1] -14.44704
## 
## $AIC.pi
## [1] 200.621
## 
## $AICc.pi
## [1] 245.53
## 
## $k
## [1] 38

==================================

CORRELATION COEFFICIENTS

==================================

# Calculate correlation coefficients to tell directions of trends
# Load datasets 
distance <- read.table("distance-matrix.txt")
D <- as.matrix(distance)
nmd.distance <- read.table("distance-matrix-nmd.txt")
ND <- as.matrix(nmd.distance)
nmd.genetic <- read.table("covariance-matrix-nmd.txt")
NG <- as.matrix(nmd.genetic)
nmd.clim.var <- read_excel("ClimVarMod-NoMissingData.xlsx")
# remove zeroes in covariance matrix, like for adonis 
NG2 <- as.matrix(nmd.genetic)
NG2[NG2 < 0] <- 0

# Load variables as distance based
covariance <- as.numeric(as.dist(NG))
distance <- as.numeric(as.dist(ND))
year <- as.numeric (dist(nmd.clim.var$Year))
mean.temp <- as.numeric(dist(nmd.clim.var$ScaledChangeMeanTemp))
rain <- as.numeric(dist(nmd.clim.var$ScaledChangeRain))
ndasy35 <- as.numeric(dist(nmd.clim.var$ScaledNdays35five))
ndasy5 <- as.numeric(dist(nmd.clim.var$ScaledNdays5five))
covariance.zero <- as.numeric(as.dist(NG2))
mean.temp.35 <- mean.temp * ndasy35
mean.temp.5 <- mean.temp * ndasy5
rain.change <- as.numeric(dist(nmd.clim.var$ScaledChangeRain))
rain <- as.numeric(dist(nmd.clim.var$ScaledRainfive))
rain.change.rain <- rain.change * rain

# calculate pearson's correlation co-efficients
cor.test(covariance.zero, year, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and year
## t = -10.969, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1727214 -0.1208133
## sample estimates:
##        cor 
## -0.1468685
cor.test(covariance.zero, mean.temp.35, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and mean.temp.35
## t = -12.345, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1905046 -0.1388929
## sample estimates:
##        cor 
## -0.1648116
cor.test(covariance.zero, mean.temp, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and mean.temp
## t = -5.5136, df = 5458, p-value = 3.678e-08
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.10075044 -0.04799239
## sample estimates:
##        cor 
## -0.0744235
cor.test(covariance.zero, ndasy35, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and ndasy35
## t = -18.705, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2702083 -0.2203504
## sample estimates:
##        cor 
## -0.2454417
cor.test(covariance.zero, ndasy5, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and ndasy5
## t = -12.666, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1946294 -0.1430914
## sample estimates:
##        cor 
## -0.1689759
cor.test(covariance.zero, mean.temp.5, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and mean.temp.5
## t = -9.2869, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.15075133 -0.09852435
## sample estimates:
##        cor 
## -0.1247242
cor.test(covariance.zero, rain.change, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and rain.change
## t = -14.461, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2175136 -0.1664183
## sample estimates:
##        cor 
## -0.1920961
cor.test(covariance.zero, rain, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and rain
## t = -14.989, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2241750 -0.1732193
## sample estimates:
##        cor 
## -0.1988315
cor.test(covariance.zero, rain.change.rain, method=c("pearson"))
## 
##  Pearson's product-moment correlation
## 
## data:  covariance.zero and rain.change.rain
## t = -12.25, df = 5458, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1892881 -0.1376551
## sample estimates:
##        cor 
## -0.1635836

==================================

MIXED MODELS: GRAPHS

==================================

# Load libraries 
library(ggplot2)
library(readxl)

# Load datasets
distance <- read.table("distance-matrix.txt")
D <- as.matrix(distance)
genmatrix <- read.table("covariance-matrix.txt")
G <- as.matrix(genmatrix)
climate.var <- read_excel("ClimVarMod.xlsx")
          
# Create the dataframe for the hexplots 
covariance <- as.numeric(as.dist(G))
distance <- as.numeric(as.dist(D))
year <- as.numeric (dist(climate.var$Year))
rain <- as.numeric(dist(climate.var$ScaledChangeRain))
mean.temp <- as.numeric(dist(climate.var$ScaledChangeMeanTemp))
hex.data <- data.frame(year, covariance, rain, mean.temp, distance)

# Plot
# Genetic covariance and pairwise temporal distance between samples 
ggplot(hex.data, aes(covariance, year)) + geom_hex() + labs(x = "Genetic Covariance", y = "Age Difference (Years)") + scale_fill_gradient(low = "gray25", high = "royalblue3") + theme_bw()

# Genetic covariance and pairwsie spatial distance between samples 
ggplot(hex.data, aes(covariance, distance)) + geom_hex() + labs(x = "Genetic Covariance", y = "Distance between Individuals (m)") + scale_fill_gradient(low = "gray25", high = "royalblue3") + theme_bw()

# Genetic covariance and pairwise difference in rate of mean temperature 
# change 
ggplot(hex.data, aes(covariance, mean.temp)) + geom_hex() + labs(x = "Genetic Covariance", y = "Difference in Rate of Mean Temperature Change (°C)") + theme_bw()

# Genetic covariance and pairwise difference in rate of change in annual 
# rainfall